### ---- SCRIPT 00: The purpose of this script is to reconfigure the raw validation survey data provided by ---- ###
### ---- YouGov to produce an aggregated summary of the data ready for analysis.   ---- ###

options(scipen=999)

#### ---- LIBRARIES ---- ####

library(dplyr)
library(foreign)

#### ---- LOAD DATA ---- ####

# YouGov survey data 
yougov_data <- read.spss("YouGov_polling_ideology_scores_RAW.sav",
                         to.data.frame = TRUE,
                         use.value.labels = FALSE)

### -- RECONFIGURE THE SURVEY DATA -- ### 

# Set column names
labels <- attr(yougov_data,"variable.labels")

names(yougov_data) <- labels

# Set missing values to NA
yougov_data <- yougov_data %>%
  mutate(across(everything(), ~ifelse(. %in% c(11,98,99), NA, .)))

# Calculate summary statistics for each ideology variable in the dataset

# Create a mode function
getmode <- function(v) {
  v <- na.omit(v)
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

get_summary_stats <- function(variable){
  sum_df <- data.frame(count = sum(!is.na(variable)),
                       mean = mean(variable,na.rm = T),
                       median = median(variable,na.rm = T),
                       mode = getmode(variable),
                       sd = sd(variable,na.rm = T),
                       min = min(variable,na.rm = T),
                       max = max(variable,na.rm = T))
  return(sum_df)
}

# Iterate through the YouGov dataset for each ideology variable and calculate summary stats
yougov_summary_stats <- data.frame()

for (col_no in 16:length(yougov_data)){
  variable <- yougov_data[,col_no]
  sum_df <- get_summary_stats(variable)
  yougov_summary_stats <- rbind(yougov_summary_stats,sum_df)
}

# Reapply the variable names and reorder to the first column
yougov_summary_stats$variable <- names(yougov_data[,16:length(yougov_data)])
yougov_summary_stats <- yougov_summary_stats %>% select(variable, everything())

# Drop unnecessary rows
yougov_summary_stats <- yougov_summary_stats[-c(7,32),]

# Save YouGov summary data
fwrite(yougov_summary_stats,"yougov_summary_data.csv")

#### ---- END ---- ####